#################################################################
# Compute consistent MCD (raw or reweighted) estimators under 
# - normality
# - multivariate Student 
# - multivariate power exponential
# /!\ The elliptical distributions are defined such that COV(X)= sigma, 
#######################################################################
library(robustbase)


# ------------------------  Main functions

consistMCD<-function(data, bdp=0.25,dist, df){
  
  # Function to compute consistent MCD location and scatter estimates 
  
  # INPUTS:
  # data: data matrix of dimension n times p
  # bdp : breakdown point
  # dist : assumed distribution used for the computation of consistency factors 
  #     'norm' : multivariate normal
  #     'stud' : multivariate Student
  #     'powerexp' : multivariate power exponential distribution
  #     /!\ The elliptical distributions are defined such that COV(X)= sigma, 
  # df : degree of freedom for the Student or powerexp distribution 
  #       NB: for the power exp, df=1 corresponds to the normal distribution
  # OUTPUTS:
  # center : MCD location consistent estimate
  # cov : MCD scatter consistent estimate

  data<-as.matrix(data)
  p=ncol(data)
  n=nrow(data)
  
  if(n<p) stop("the sample size needs to be larger than the dimension")
  if((dist=="stud" | dist=="powerexp") & is.null(df)) stop("you have to specify a degree of freedom")
  
  MCD<-covMcd(data,alpha=1-bdp)
  qalpha_norm<- qchisq(1-bdp,p)
  calpha_norm<-pchisq(qalpha_norm,p+2)/(1-bdp) # calpha_norm = 1/consistency factor
  
  # Compute the c_alpha
  if(dist=="norm"){
    calpha=calpha_norm
  }else if(dist=="stud"){
    alpha<-(1-bdp)
    K<- (df-2)^(-p/2)*gamma((p+df)/2)/(pi^(p*0.5)*gamma(df/2))
    const<- (1-bdp)*gamma(0.5*p)/(2*pi^(p*0.5))*1/K
    qalpha<-uniroot(stud_int2,interval=c(0,30),p=p,df=df,const=const)$root
    r<-sqrt(qalpha)
    calpha<-1/const *1/p*stud_funct(qalpha,e=2,p=p,df=df) # consistency factor = 1/calpha
    
  }else if( dist=="powerexp"){
    const<-(1-bdp)*gamma(p/(2*df))/(2*df) *((p*gamma(p/(2*df)))/gamma((p+2)/(2*df)))^(0.5*p)
    a=(gamma((p+2)/(2*df))/(p*gamma(p/(2*df))))^df
    qalpha<-uniroot(mvexp_int2,interval=c(1,30),p=p,df=df,const=const)$root
    r=sqrt(qalpha)
    calpha<-1/const*1/p*mvexp_funct(qalpha,e=2,p,df)
  }else stop("This is not a valid dist value. The possible values are 'norm','stud' or 'powerexp'")
  return(list(center=MCD$raw.center, cov=MCD$raw.cov*calpha_norm/calpha))
}


consistRMCD<- function(data, moy0, cov0, dist, df){
  
  # Function to compute consistent one-step reweighted MCD location and scatter estimates 
  
  # INPUTS:
  # data: data matrix of dimension n times p
  # moy0 : initial raw location estimate
  # cov : initial raw scater estimate
  # dist : assumed distribution used for the computation of consistency factors 
  #     'norm' : multivariate normal
  #     'stud' : multivariate Student
  #     'powerexp' : multivariate power exponential distribution
  #     /!\ The elliptical distributions are defined such that COV(X)= sigma, 
  # df : degree of freedom for the Student or powerexp distribution 
  #       NB: for the power exp, df=1 corresponds to the normal distribution
  # OUTPUTS:
  # center : one-step reweighted MCD location consistent estimate
  # cov : one-step reweighted MCD scatter consistent estimate
  
  delta=0.025
  d1<-  1-delta
  
  data<-as.matrix(data)
  n<-nrow(data)
  p<-ncol(data)
  
  if(n<p) stop("the sample size needs to be larger than the dimension")
  if((dist=="stud" | dist=="powerexp") & is.null(df)) stop("you have to specify a degree of freedom")
  
  if(dist=="norm"){
    
    qdelta<- qchisq(1-delta,p)
    d1<- 1-delta
    d3<-  pchisq(qdelta,p+2)
    
  }else if (dist=='stud'){
    
    K<- (df-2)^(-p/2)*gamma((p+df)/2)/(pi^(p*0.5)*gamma(df/2))
    const<- (1-delta)*gamma(0.5*p)/(2*pi^(p*0.5))*1/K
    qdelta<-uniroot(stud_int2,interval=c(0,50),p=p,df=df,const=const)$root
    d1<- 1/const*(1-delta)*stud_funct(qdelta,e=0,p=p,df=df)
    d3<- 1/const*(1-delta)/p*stud_funct(qdelta,e=2,p=p,df=df)
    
  }else if (dist=="powerexp"){
    
    const<-(1-delta)*gamma(p/(2*df))/(2*df) *((p*gamma(p/(2*df)))/gamma((p+2)/(2*df)))^(0.5*p)
    a=(gamma((p+2)/(2*df))/(p*gamma(p/(2*df))))^df
    qdelta<-uniroot(mvexp_int2,interval=c(1,50),p=p,df=df,const=const)$root
    d1<- 1/const*(1-delta)*mvexp_funct(qdelta, e=0,p=p,df=df)
    d3<-1/const*(1-delta)/p*mvexp_funct(qdelta, e=2,p=p,df=df)
  }else stop("This is not a valid dist value. The possible values are 'norm','stud' or 'powerexp'")
  
  indic<-as.numeric(apply(data,1,DistMahala,mu=moy0,sigma=cov0)<qdelta)
  moy<- Reduce("+",lapply(seq_len(n),function(i)data[i,]*indic[i]))/sum(indic)
  cov<- d1/d3*Reduce("+",lapply(seq_len(n),function(i)indic[i]*(data[i,]-moy0)%*%t(data[i,]-moy0)))/sum(indic)
  return(list(center=moy,cov=cov))
}


# ------------------  Auxiliary functions

# Functions involved in the computation of calpha - Student
stud_integrand<-function(r,e,p,df){
  return(r^(p+e-1)/(1+r^2/(df-2))^((p+df)*0.5))
}

stud_integrand2<-function(r,e,p,df)
  return(r^(p+e-1)/(1+r^2/(df-2))^((p+2+df)*0.5))

stud_funct<-function(q,e,p,df){
  return(integrate(stud_integrand,lower=0,upper=sqrt(q),e=e,p=p,df=df)$value)
}

stud_funct2<-function(q,e,p,df)
  return(integrate(stud_integrand2,lower=0,upper=sqrt(q),e=e,p=p,df=df)$value)

stud_int2<-function(q,p,df,const) stud_funct(q,e=0,p=p,df=df)-const


# Functions involved in the computation of calpha - Power exp
mvexp_integrand<-function(r,e,p,df){
  a=(gamma((p+2)/(2*df))/(p*gamma(p/(2*df))))^df
  return(r^(p+e-1)*exp(-a*r^(2*df)))
}
mvexp_funct<-function(q,e,p,df){
  return(integrate(mvexp_integrand,lower=0,upper=sqrt(q),e=e,p=p,df=df)$value)
}
mvexp_int2<-function(q,p,df,const) mvexp_funct(q,e=0,p,df)-const

# squared Mahalanobis distance
DistMahala<-function(x,mu,sigma){
  return((x-mu)%*%solve(sigma) %*% (x-mu))
}
